library(rgdal)
library(raster)
library(sp)
library(rgeos)
library(plyr)
library(sf)

## set this to the appropriate directory
setwd("C:\\Papers\\SF_natural_experiment\\Replication_Files")

## Part 1: building the precinct polygons for 1989 ##

## import the consolidated precinct turnout information
m_pr = read.csv("consolidated_precincts.csv", header=T, stringsAsFactors = F)

## import the 1987 precinct polygons
p = readOGR(dsn="districts87.geojson", stringsAsFactors = F)

## convert them to SpatialPolygons class
sp_poly = SpatialPolygons(Srl=p@polygons, pO=p@plotOrder, proj4string = CRS(proj4string(p)))

## import the KML file with the shape of Treasure Island.  It's precinct 2102
ti = readOGR(dsn="ti.kml", layer="ti")

## make new polygons
merged_poly = list()
for (j in 1:nrow(m_pr)) {
  ## handle precinct 2102 - Treasure Island - separately
  if (m_pr$Precinct[j] == "2102") {
    merged_poly[[j]] = ti@polygons[[1]]
    merged_poly[[j]]@ID = "2102"
    next
  }
  
  if (!grepl("/ ", m_pr$Precinct[j])) {
    v = as.numeric(m_pr$Precinct[j])
    k = match(v, p@data$precinct)
    merged_poly[[j]] = sp_poly[k]@polygons[[1]]
    merged_poly[[j]]@ID = m_pr$Precinct[j]
  } else {
    v = as.numeric(strsplit(m_pr$Precinct[j], "/ ")[[1]])
    k = match(v, p@data$precinct)
    merged_poly[[j]] = gUnionCascaded(sp_poly[k])@polygons[[1]]
    merged_poly[[j]]@ID = m_pr$Precinct[j]
  }
}

## convert the list to class SpatialPolygons
sp_m_poly = SpatialPolygons(Srl=merged_poly, pO=1:nrow(m_pr),
                            proj4string = CRS(proj4string(p)))

## display for diagnostics
## The map does not show the park areas: 
## Golden Gate Park, Land’s End, Fort Mason, Mission Dolores, and the MacLaren park and its golf course. 
## The parks were not part of any precinct and thus are excluded from the geometry.
plot(sp_m_poly, col="lightblue")

## Part 2: Prepare the weather data ##

## get the list of raster files
flist = list.files(path="./Landsat5_data", 
                   pattern="B6\\.TIF$",
                   full.names=F, 
                   recursive=F, include.dirs=F, ignore.case=T)

fd = read.csv("Landsat5_data\\file_date.csv", header=T, stringsAsFactors = F)
fd$Text = strftime(strptime(fd$Date, "%m/%d/%Y"), format="%Y%m%d")

city_weather = read.csv("weather_data\\SF_80_90.csv", header=T, stringsAsFactors=F)

temp = merge(x=fd, by="Text", y=city_weather[ , c(3:6)], by.y="DATE")

## Part 3: calculate mean pixel intensity for each polygon ##

## Note: This is a very slow operation. It takes about 3 minutes to process each raster file. With 32 raster files, this step will take up to an hour and a half.

df = m_pr

for (j in 1:length(flist)) {
  cat("Processing", flist[j], "\n")
  f = raster(paste0("./Landsat5_data/", flist[j]))
  tmp = extract(f, sp_m_poly, fun=mean, na.rm=T, df=T)
  df[ , fd$Text[fd$File == flist[j]] ] = tmp[, 2]
}

## Part 4: Estimating models for each precinct ##

df$int_coef = 0
df$t_coef = 0
df$we_coef = 0
df$sun_elev_coef = 0
df$sun_azim_coef = 0

df$t_int = 0
df$t_t = 0
df$t_we = 0
df$t_sun_elev = 0
df$t_sun_azim = 0

df$adj_r_sq = 0
df$residual = 0
df$res_ratio = 0

t = temp$TMAX[1:31]
we = ifelse(temp$Weekday[1:31] > 5, 1, 0)
sun_elev = temp$SunElev[1:31]
sun_azim = temp$SunAzim[1:31]

used_columns = match(temp$Text[1:31], gsub("^X", "", names(df)))

nov_89_data = data.frame(t=temp$TMAX[temp$Text == "19891108"],
                         we=ifelse(temp$Weekday[temp$Text == "19891108"]>5, 1, 0),
                         sun_elev = temp$SunElev[temp$Text == "19891108"],
                         sun_azim = temp$SunAzim[temp$Text == "19891108"])

for (j in 1:nrow(df)) {

  ## convert a dataframe row into a matrix, then a vector
  y = as.matrix(df[j, used_columns])[1, ]
  
  m = lm(y ~ t + we + sun_elev + sun_azim)
  
  v = summary(m)
  df$int_coef[j] = v$coefficients["(Intercept)", "Estimate"]
  df$t_coef[j] = v$coefficients["t", "Estimate"]
  df$we_coef[j] = v$coefficients["we", "Estimate"]
  df$sun_elev_coef[j] = v$coefficients["sun_elev", "Estimate"]
  df$sun_azim_coef[j] = v$coefficients["sun_azim", "Estimate"]
  
  df$t_int[j] = v$coefficients["(Intercept)", "t value"]
  df$t_t[j] = v$coefficients["t", "t value"]
  df$t_we[j] = v$coefficients["we", "t value"]
  df$t_sun_elev[j] = v$coefficients["sun_elev", "t value"]
  df$t_sun_azim[j] = v$coefficients["sun_azim", "t value"]
  
  df$adj_r_sq[j] = v$adj.r.squared
  df$residual[j] = df[j, "19891108"] - predict(object=m, newdata=nov_89_data)
  df$res_ratio[j] = df[j, "19891108"]/predict(object=m, newdata=nov_89_data)
}


df$quake_disruption = 100*(1 - df$res_ratio)

write.csv(df, "heat_data.csv", row.names=F)

